Import data

library(dplyr)
library(tidyverse)
library(readxl)

# setwd("D:/PhD/01_Data/03_Vitro/01_In vitro growth of gut microbes in faeces") # Windows
# setwd("/Volumes/Yiming_Wang/PhD/01_Data/03_Vitro/01_In vitro growth of gut microbes in faeces") # Mac
setwd("/Users/godric/Desktop/Fut2 review comments/Revision") #Macbook pro

df.all <- read_excel("Data for AUC.xlsx") %>% 
  filter(Genotype != "KO")


# load package
pacman::p_load(
  ggplot2,ggsci, ggthemes,PoweR
)

set.seed(123)

# Level orders
level_order_Treatment <- c("mBasal","mBasal + 2'-FL")

# My color panel
mypal = pal_jco("default", alpha = 0.7)(9)
mypal
## [1] "#0073C2B2" "#EFC000B2" "#868686B2" "#CD534CB2" "#7AA6DCB2" "#003C67B2"
## [7] "#8F7700B2" "#3B3B3BB2" "#A73030B2"
library("scales")
show_col(mypal) #  #3C5488B2(WT) and  #E64B35B2 (KO)

# Negative control #3B3B3BB2 #868686B2 

Summary Line Charts_all sample_G vs NG

# All samples (G vs NG)
library(ggplot2)
library(ggthemes)
library (gridExtra)

# All sample
df.1<-df.all%>%
  filter(Genotype!="Negative")%>%
  gather(type,value,-c(No,ID,Genotype,Sex,GenotypeSex,Cage))%>% # convert the original table to long table, create two new columns and put them after -c()
  separate(type,c("hr","Treatment"),remove=T) # Seprate type into two columns, e.g. "24hr_NG", separate function can separate the item which has non-alphabet/number symbol into two parts
df.1 <-  df.1[-c(3:5)] 
  

# Summarise data
summary<-df.1 %>%
  group_by(Treatment,hr) %>%
  summarise(value.mean=mean(value), sd=sd(value))%>%
  ungroup()

# min.line
min.line<-summary%>%
  select(-sd)%>%
  mutate(hr = as.numeric(gsub("hr","",gsub("X","",hr))))%>%
  spread(Treatment,value.mean)%>%
  group_by(hr)%>%
  mutate(min_line=min(G,NG))%>%
  select(hr,min_line)

summary.final<-summary %>%
  mutate(hr.new = as.numeric(gsub("hr","",gsub("X","",hr))))%>% 
  left_join(min.line,by=c("hr.new"="hr")) %>% 
  add_column(Group = "Sample")


# Gather negative control data
neg.ctrl<-df.all %>%
  filter(Genotype == "Negative")%>%
  gather(type,value,-c(No,ID,Genotype,Sex,GenotypeSex,Cage))%>%
  separate(type,c("hr","Treatment"),remove=T)%>%
  group_by(Genotype,Treatment,hr) %>%
  summarise(value.mean=mean(value),sd=sd(value))%>% #Summarize mean and standard deviation for future figure use
  ungroup()%>%
  mutate(group=paste(Treatment,Genotype,sep=" "))%>%
  mutate(hr.new=as.numeric(gsub("hr","",gsub("X","",hr))))%>%
  mutate(min_line=0) %>% 
  select(-c(group)) %>% 
  rename(Group = Genotype)
  # select(Genotype,hr,Treatment,value.mean)%>%
  # rename(Genotype=ID)%>%
  # mutate(sd=0)#The negative control is under ID column, to bind negative control and my other data, the ID column should be renamed to Genotype

#bind data 
summary.final2<-rbind(summary.final,neg.ctrl)
summary.final2<-summary.final2 %>%
  mutate(Combine=paste(Treatment,Group,sep=" ")) %>% 
  mutate(Treatment = ifelse(Combine == "G Negative", "Media control + 2'-FL",
                            ifelse(Combine == "NG Negative", "Media control",
                                   ifelse(Combine == "G Sample", "mBasal + 2'-FL",
                                          ifelse(Combine == "NG Sample", "mBasal","ditch"))))) 


calculus<-summary.final %>%
  group_by(Treatment) %>%
  arrange(Treatment,hr.new)%>% #Sort your data based on ID,Treatment,hr.new
  mutate(sum.value=value.mean+lag(value.mean), #sum.value is the summary of the two base value
         hr.interval=hr.new-lag(hr.new))%>% # hr.interval is the height of the trapezoid
  mutate(area=sum.value*hr.interval/2)%>% #this area is the total area of the highest curve
  # summarise(area.total=sum(area,na.rm = T))%>% # Summarize different timepoint area
  ungroup() %>%
  group_by(Treatment)%>%
  summarise(total.area=sum(area,na.rm=T))%>%
  ungroup()%>%
  mutate(area.difference=lag(total.area)-total.area)%>%
  ungroup()%>%
  filter(!is.na(area.difference))%>%
  select(area.difference)#You can't rejoin the dataset because this is summarised and no id is there. 


figure_all <- ggplot(data=subset(summary.final2), aes(x=hr.new,y=value.mean,group=Treatment))+
      geom_line(size=1,aes(color=Treatment)) +
      geom_point(size=2,aes(color=Treatment,shape=Treatment)) +
      geom_pointrange(aes(ymin=value.mean-sd, ymax=value.mean+sd,color=Treatment)) + # error bar method 1 (just line)
      theme_bw() +
      labs(x="Time (hr)",y=bquote(OD[600]),caption = "",
           title = paste0(""))+ # Faeces collected from KO mice, you can use "title = paste0(k,"+ Negative")
      theme(legend.position="bottom",
            legend.title=element_blank(),
            legend.text = element_text(size=14),
        axis.text.x=element_text(colour="black", face="plain", size=12), 
        axis.text.y=element_text(colour="black", face="plain", size=12),
        axis.title.x=element_text(margin = margin(t = 5), size=12),
        axis.title.y=element_text(margin = margin(r = 5), size=12),
        axis.title = element_text(face="plain", size = 10),
        plot.title = element_text(size=10, hjust = 0.5),
        panel.grid = element_blank(),
        panel.border =  element_rect(color = "black",
                                    fill = NA,
                                    size = 1),
        aspect.ratio = 1,
        plot.background = element_rect(fill="transparent", colour = "transparent"))+
      scale_y_continuous(limits = c(0,0.6),breaks = seq(0,0.6,0.1))+
      scale_x_continuous(limits=c(0,120),breaks=seq(0,120,24))+
      scale_colour_manual(values=c("#b86877","#70a1a7","#3B3B3BB2","#868686B2")) +
      geom_ribbon(aes(ymax=value.mean, ymin=min_line),fill="#cfcfcf",alpha = 0.2)+ # ivory3, seashell3, gray66, #87c0e6, #ffa0aa, #808080
      guides(color=guide_legend(nrow=2,byrow=TRUE))+
      annotate("text",x=60,y=0.6,
             label=expression(italic("P") < "0.0001"),hjust=0.5, size=4)

figure_all

Calculate area of each sample

calculus2<-df.1 %>%
  mutate(hr.new = as.numeric(gsub("hr","",gsub("X","",hr))))%>%
  group_by(ID,Treatment) %>%
  arrange(ID,Treatment,hr.new) %>%
  mutate(sum.value=value+lag(value),
         hr.interval=hr.new-lag(hr.new)) %>%
  mutate(area=sum.value*hr.interval/2) %>%
  ungroup() %>%
  group_by(ID,Treatment) %>%
  summarise(total.area=sum(area,na.rm=T)) %>%
  ungroup() %>%
  spread(Treatment,total.area)

set.seed(123)

# Choosing a normality test
  ## Answer: D'Agostino-Pearson normality test ("omnibus K2" test)
  ## Reasons: It first computes the skewness and kurtosis to quantify how far the distribution is from Gaussian in terms of asymmetry and shape.  It then calculates how far each of these values differs from the value expected with a Gaussian distribution, and computes a single P value from the sum of these discrepancies. It is a versatile and powerful normality test. (https://www.graphpad.com/guides/prism/latest/statistics/stat_choosing_a_normality_test.htm)

# D'Agostino-Pearson normality test ("omnibus K2" test)
  ##Null (H0): The data are normally distributed Alternate/the data are sampled from a Gaussian distribution
  ##The low p-value (<0.05) indicates you reject that null hypothesis and so accept the alternative hypothesis that the data are not sampled from a Gaussian population
  ## https://www.rdocumentation.org/packages/PoweR/versions/1.0.7/topics/statcompute

# Example 
library(PoweR)
getindex() # index 6 is D'Agostino-Pearson normality test ("omnibus K2" test)
##    Index                            Law Nbparams Default1 Default2 Default3
## 1      1                  Laplace(mu,b)        2 0.000000        1       NA
## 2      2               Normal(mu,sigma)        2 0.000000        1       NA
## 3      3               Cauchy(mu,sigma)        2 0.000000        1       NA
## 4      4             Logistic(mu,sigma)        2 0.000000        1       NA
## 5      5              Gamma(shape,rate)        2 2.000000        1       NA
## 6      6                      Beta(a,b)        2 1.000000        1       NA
## 7      7                   Uniform(a,b)        2 0.000000        1       NA
## 8      8                  Student-t(df)        1 1.000000       NA       NA
## 9      9                Chi-squared(df)        1 1.000000       NA       NA
## 10    10       Lognormal(logmean,logsd)        2 0.000000        1       NA
## 11    11           Weibull(shape,scale)        2 1.000000        1       NA
## 12    12             ShiftedExp(l,rate)        2 0.000000        1       NA
## 13    13                        U^{j+1}        1 1.000000       NA       NA
## 14    14                 AveUnif(k,a,b)        3 2.000000        0      1.0
## 15    15                       UUnif(j)        1 1.000000       NA       NA
## 16    16                       VUnif(j)        1 1.000000       NA       NA
## 17    17           JSU(mu,sigma,nu,tau)        4 0.000000        1      0.0
## 18    18                       Tukey(l)        1 1.000000       NA       NA
## 19    19                    LoConN(p,m)        2 0.200000        3       NA
## 20    20                       JSB(g,d)        2 0.000000        1       NA
## 21    21          SkewN(xi,omega,alpha)        3 0.000000        1      0.0
## 22    22                    ScConN(p,d)        2 0.200000        2       NA
## 23    23                GP(mu,sigma,xi)        3 0.000000        1      0.0
## 24    24                GED(mu,sigma,p)        3 0.000000        1      1.0
## 25    25        Stable(alpha,beta,c,mu)        4 1.000000        0      1.0
## 26    26               Gumbel(mu,sigma)        2 1.000000        1       NA
## 27    27        Frechet(mu,sigma,alpha)        3 0.000000        1      1.0
## 28    28               GEV(mu,sigma,xi)        3 0.000000        1      0.0
## 29    29                GArcSine(alpha)        1 0.500000       NA       NA
## 30    30                FoldN(mu,sigma)        2 0.000000        1       NA
## 31    31                    MixN(p,m,d)        3 0.500000        0      1.0
## 32    32                    TruncN(a,b)        2 0.000000        1       NA
## 33    33                        Nout(a)        1 1.000000       NA       NA
## 34    34             GEP(t1,t2,t3,crit)        4 0.500000        0      1.0
## 35    35            Exponential(lambda)        1 1.000000       NA       NA
## 36    36               ALaplace(mu,b,k)        3 0.000000        1      2.0
## 37    37       NIG(alpha,beta,delta,mu)        4 1.000000        0      1.0
## 38    38    APD(theta,phi,alpha,lambda)        4 0.000000        1      0.5
## 39    39 modAPD(mu,sigma,theta1,theta2)        4 0.000000        1      0.5
## 40    40           LPtn(alpha,mu,sigma)        3 1.959964        0      1.0
##    Default4
## 1        NA
## 2        NA
## 3        NA
## 4        NA
## 5        NA
## 6        NA
## 7        NA
## 8        NA
## 9        NA
## 10       NA
## 11       NA
## 12       NA
## 13       NA
## 14       NA
## 15       NA
## 16       NA
## 17    5e-01
## 18       NA
## 19       NA
## 20       NA
## 21       NA
## 22       NA
## 23       NA
## 24       NA
## 25    0e+00
## 26       NA
## 27       NA
## 28       NA
## 29       NA
## 30       NA
## 31       NA
## 32       NA
## 33       NA
## 34    1e-06
## 35       NA
## 36       NA
## 37    0e+00
## 38    2e+00
## 39    2e+00
## 40       NA
##    Index                Stat Alter Nbparams
## 1      1                 K-S     3        0
## 2      2                AD^*     3        0
## 3      3                 Z_C     3        0
## 4      4                 Z_A     3        0
## 5      5                 P_S     3        0
## 6      6                 K^2     3        0
## 7      7                  JB     3        0
## 8      8                  DH     3        0
## 9      9                 RJB     3        0
## 10    10            T_{Lmom}     3        0
## 11    11      T_{Lmom}^{(1)}     3        0
## 12    12      T_{Lmom}^{(2)}     3        0
## 13    13      T_{Lmom}^{(3)}     3        0
## 14    14            BM_{3-4}     3        0
## 15    15            BM_{3-6}     3        0
## 16    16           T_{MC-LR}     3        0
## 17    17                 T_w 0,1,2        0
## 18    18       T_{MC-LR}-T_w     3        0
## 19    19             T_{S,5}     3        0
## 20    20             T_{K,5}     3        0
## 21    21                   W     4        0
## 22    22                  W'     4        0
## 23    23            tilde{W}     4        0
## 24    24                   D 0,1,2        0
## 25    25                   r     4        0
## 26    26                  CS     3        0
## 27    27                   Q 0,1,2        0
## 28    28                Q-Q* 0,1,2        0
## 29    29                BCMR     3        0
## 30    30            beta_3^2     3        0
## 31    31          T^*(alpha)     1        1
## 32    32                 I_n     3        0
## 33    33              R_{sJ}     3        0
## 34    34                  Q* 0,1,2        0
## 35    35                 R_n     3        0
## 36    36             X_{APD}     3        0
## 37    37             Z_{EPD} 0,1,2        0
## 38    38                 GLB     3        0
## 39    39              V_3-ML 0,1,2        0
## 40    40              V_4-ML 0,1,2        0
## 41    41                   S     3        0
## 42    42                 A^2     3        0
## 43    43                 W^2     3        0
## 44    44                 U^2     3        0
## 45    45            sqrt{n}D     3        0
## 46    46                   V     3        0
## 47    47    T_{n,a}^{(1)}-MO     3        1
## 48    48    T_{n,a}^{(1)}-ML     3        1
## 49    49    T_{n,a}^{(2)}-MO     3        1
## 50    50    T_{n,a}^{(2)}-ML     3        1
## 51    51         T_{m,n}^{V}     4        1
## 52    52         T_{m,n}^{E}     4        1
## 53    53         T_{m,n}^{C}     4        1
## 54    54            hat{G}_n     3        0
## 55    55                 V_3 0,1,2        0
## 56    56                 V_4 0,1,2        0
## 57    57                 K_1     3        0
## 58    58                   T     3        0
## 59    59                   Z     3        0
## 60    60                   K     3        0
## 61    61              DLLap1 0,1,2        0
## 62    62              DLLap2 0,1,2        0
## 63    63                 D_n     3        0
## 64    64           W_{n}^{2}     3        0
## 65    65           A_{n}^{2}     3        0
## 66    66                 C_n     3        0
## 67    67                 K_n     3        0
## 68    68                 T_1     3        0
## 69    69                 T_2     3        0
## 70    70                G(n)     3        0
## 71    71                   Q     3        0
## 72    72        2nI^{lambda}     3        1
## 73    73                M(n)     3        0
## 74    74         L_{n}^{(m)}     4        1
## 75    75         S_{n}^{(m)}     3        1
## 76    76              H(m,n)     4        1
## 77    77            A^{*}(n)     3        0
## 78    78 D_{n,m}(phi_lambda)     3        2
## 79    79             E_{m,n}     3        1
## 80    80    T_{n,m}^{lambda}     3        2
## 81    81                 Z_A     3        0
## 82    82                 Z_C     3        0
## 83    83              t test 0,1,2        1
## 84    85              DLLap3     3        0
## 85    86  T(alpha_1,alpha_2)     3        1
## 86    87                 T_n     3        0
## 87    88       T^{LS}(alpha)     3        1
## 88    89  T^{LS}_3(mu,alpha)     3        2
## 89    90  T^{LS}_4(alpha,mu)     3        2
## 90    91                GV_1 0,1,2        0
## 91    92                GV_2 0,1,2        0
## 92    93                Ho_K 0,1,2        0
## 93    94                Ho_U 0,1,2        0
## 94    95                Ho_V 0,1,2        0
## 95    96                Ho_W 0,1,2        0
## 96    97                SR^* 0,1,2        0
statcompute(6,calculus2$G, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] 10.62906
## 
## $pvalue
## [1] 0
## 
## $decision
## [1] 1
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
statcompute(6,calculus2$NG, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] 1.29134
## 
## $pvalue
## [1] 0
## 
## $decision
## [1] 1
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
calculus3 <- calculus2 %>% 
  gather("Treatment", "Area",-ID) %>% 
  mutate(Treatment = ifelse(Treatment == "G", "mBasal + 2'-FL", "mBasal"))

## Summary
Summary.area <- calculus3 %>% 
  group_by(Treatment) %>% 
  summarise(
    count = n(),
    median= median (Area, na.rm=TRUE),
    IQR = IQR (Area,na.rm=TRUE),
    IQR25 = quantile(Area, 0.25),
    IQR75 = quantile(Area, 0.75)
  )
Summary.area
## # A tibble: 2 × 6
##   Treatment      count median   IQR IQR25 IQR75
##   <chr>          <int>  <dbl> <dbl> <dbl> <dbl>
## 1 mBasal            32   15.1  2.20  14.3  16.5
## 2 mBasal + 2'-FL    32   35.6  5.46  33.3  38.7
## Area difference between mBasal and mBasal + 2'-FL
stats <- wilcox.test(Area ~ Treatment, data=calculus3,
                     paired=FALSE, correct=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)
stats
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Area by Treatment
## W = 0, p-value = 6.508e-12
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -21.86401 -18.52206
## sample estimates:
## difference in location 
##              -20.16002
# Effect size
library (rstatix)
set.seed(123)
effectsize <- calculus3 %>% wilcox_effsize(Area ~ Treatment)
effectsize
## # A tibble: 1 × 7
##   .y.   group1 group2         effsize    n1    n2 magnitude
## * <chr> <chr>  <chr>            <dbl> <int> <int> <ord>    
## 1 Area  mBasal mBasal + 2'-FL   0.859    32    32 large
# Method 2_ggpubr_to add it in the figure
## P value
library(ggpubr)
set.seed(123)
compare_means(Area ~ Treatment, data=calculus3,method = "wilcox.test", paired=TRUE, group.by = NULL, ref.group=NULL)
## # A tibble: 1 × 8
##   .y.   group1         group2        p         p.adj p.format p.signif method  
##   <chr> <chr>          <chr>     <dbl>         <dbl> <chr>    <chr>    <chr>   
## 1 Area  mBasal + 2'-FL mBasal 4.66e-10 0.00000000047 4.7e-10  ****     Wilcoxon
level_order_Treatment <- c("mBasal","mBasal + 2'-FL")

# Visualization
# install.packages("hrbrthemes")
# library(hrbrthemes)
library(viridis)
figure.area <- ggplot(data=calculus3, mapping = aes(y=Area, x=factor(Treatment,level=level_order_Treatment)))+
  geom_boxplot(outlier.shape = NA)+
  theme_bw()+
  geom_jitter(aes(colour=Treatment),
              size=2,alpha=0.9,
              position=position_jitterdodge(jitter.width = 0.5,
                                            jitter.height = 0, 
                                            dodge.width = 0.8))+
  labs(x="",y="Area size",caption = "",
      title = "")+
  theme(legend.position="none",
        axis.title = element_text(face="plain", size = 10),
        axis.title.x=element_text(margin = margin(t = 5)),
        axis.title.y=element_text(margin = margin(r = 5), size=16),
        axis.text.x=element_text(colour="black", face="plain", size=14), 
        axis.text.y=element_text(colour="black", face="plain", size=14),
        plot.title = element_text(size=10, hjust = 0.5),
        panel.grid = element_blank(),
        panel.border =  element_rect(color = "black",
                                    fill = NA,
                                    size = 1),
        aspect.ratio = 1,
        plot.background = element_rect(fill = "transparent", colour = "transparent"))+
  scale_y_continuous(limits = c(0,50),breaks = seq(0,50,10))+
  scale_colour_manual(values=c("#b86877","#70a1a7"))
  

figure.area

# 
#       annotate("text",x=0,y=0.6,
#              label=paste0("Total Area = ",round(as.vector(subset(calculus,Genotype==k,select=area.difference)),2)),
#              hjust=0,size=3)
#   
figure.area.final <- figure.area + 
  annotate("text",x=1.515,y=50,
           label=expression(italic("P") < "0.0001"),hjust=0.5, size=4.5)
figure.area.final